# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 03_attendance.R
### This script creates attendance figures and tables.
# ----
# Attendance figure ----
# Convert data to long format
bimli_attendance_long <- bimli %>%
  select(child_code, treatment, session1, session2, session3, session4, village) %>%
  pivot_longer(cols = starts_with("session"), 
               names_to = "session", 
               values_to = "attendance") %>%
  mutate(
    session = as.factor(gsub("session", "", session)),
    treatment = case_when(
      treatment == "Media Literacy" ~ "Treatment",
      treatment == "Spoken English" ~ "Control"
    ),
    attendance = as.numeric(attendance) # Ensure attendance is numeric
  )

# Specify the survey design with clustering at the village level
design_att <- svydesign(id = ~village, 
                        data = bimli_attendance_long, 
                        weights = ~1)

# Fit a model using svyglm with treatment, session, and their interaction
attendance_model <- svyglm(attendance ~ treatment * session, 
                           design = design_att)

# Create a new data frame for predictions
pred_data_att <- expand.grid(
  treatment = c("Treatment", "Control"),
  session = levels(bimli_attendance_long$session)
)

# Predict attendance and get standard errors
pred_att <- as.data.frame(predict(attendance_model, 
                                  newdata = pred_data_att, 
                                  type = "response", se.fit = TRUE))

# Add predicted values and standard errors to pred_data by extracting components separately
pred_data_att <- pred_data_att %>%
  mutate(
    predicted_attendance = pred_att$response, # Extract the predicted values
    se_attendance = pred_att$SE # Extract standard errors from the "SE" attribute
  )


# Plot predicted attendance with clustered SEs
ggplot(pred_data_att, aes(x = session, y = predicted_attendance, 
                          color = treatment, group = treatment)) +
  geom_line() +
  geom_errorbar(aes(ymin = predicted_attendance - 1.96 * se_attendance, 
                    ymax = predicted_attendance + 1.96 * se_attendance), 
                width = 0.1) +
  geom_point() +
  labs(x = "Session", y = "Proportion in attendance", color = "") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) +
  scale_color_manual(values = 
                       c("Control" = "#1a80bb", "Treatment" = "#ea801c")) +
  theme_bw() +
  theme(
    plot.title.position = "plot",
    plot.title = element_text(size = 12, hjust = 0),
    strip.text.y.left = element_text(angle = 0, hjust = 0),
    legend.position = "bottom"
  )

ggsave("output/figures/compliance.pdf", height = 3.6, width = 5)

# Number of sessions attended ----
compliance_table <- bimli %>%
  mutate(
    treatment  = case_when(
      treatment == "Media Literacy" ~ "Treatment",
      treatment == "Spoken English"  ~ "Control"
    ),
    compliance = as.integer(compliance)
  ) %>%
  count(compliance, treatment) %>%
  group_by(treatment) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup() %>%
  mutate(label = paste0(n, " (", percent(pct), ")")) %>%
  select(-n, -pct) %>%
  pivot_wider(
    names_from  = treatment,
    values_from = label,
    names_glue  = "N {treatment} (%)"
  ) %>%
  rename(`Number of sessions attended` = compliance)

compliance_table %>%
  xtable::xtable(caption = "Number of sessions attended", align = "lrrr",
                 label = "tab:attendance_number_sessions") %>%
  print(include.rownames = FALSE, 
        caption.placement = "top",
        type= 'latex',
        add.to.row = list(
          pos = list(nrow(.)), 
          command = c(
            "\\hline\n\\multicolumn{3}{l}{\\footnotesize } \\\\\n")),
        hline.after = c(-1, 0), # Add horizontal lines before and after headers
        comment = F,
        table.placement = getOption("xtable.table.placement", "h!"),
        file = paste0("output/tables/tab_compliance_sessions_attended", ".tex"))

# END of 03_attendance.R ----